# Merging with AP3 model 
library(pacman)
p_load(
  here, data.table, fst, readxl, stringr, collapse, dplyr, lubridate
)

# Goal: Calculate marginal damages for each power plant 
# 1) Load emissions rates for each plant (so2, co2, nox, pm2.5)
# 2) Merge plants to marginal damages per ton of pollutant (using fips and stack height)
# 3) Multiply emissions rates by marginal damages

# TODO: stack heights to median of fuel type for missing observations
calc_marginal_damages = function(
  SOCIAL_COST_OF_CARBON = 51*0.7993, # 51 in 2022 dollars to 2014
  time_bounds =  c(ymd_hms('2019-01-01 10:00:00'), ymd_hms('2019-12-31 23:00:00'))
){
# Loading Data ----------------------------------------------------------------
  print('Loading data')
  # Unit information table, taking most recent year for each plant
  oge_plant_info_dt =
    read.fst(
      path = here("Data/electricity-generation/plant-info-dt-oge.fst"),
      as.data.table = TRUE
    )[year %in% unique(year(time_bounds))]
  oge_plant_info_unique_dt = 
    oge_plant_info_dt[, 
      .SD[which.max(year)], 
      by = .(plant_id_eia)
    ]
  # Now the emissions rates 
  emissions_rate_dt_raw = 
    merge(
      read.fst(
        here('Data/electricity-generation/plant-model-fit/emissions-rate-dt.fst'),
        as.data.table = TRUE
      ),
      oge_plant_info_unique_dt,
      by = 'plant_id_eia'
    )[,.(
      plant_id_eia, 
      fips, 
      pollutant, 
      # For now we only have low/medium damages.
      stack_height = fcase(
        stack_height < 250, 'low',
        stack_height >= 250, 'medium',# & stack_height <= 500,
        #stack_height > 500, 'tall'
        default = 'low'
      ),
      output, 
      net_generation_mwh_split, 
      #tot_gload,
      # Converting emissions rates to tons/mwh, so2+nox+co2 in lbs
      emissions_rate_tons = emissions_rate/2000
    )]
  # Reading in pm2.5 emissions rates 
  avg_emissions_dt = fread(
    here('Data/electricity-generation/output/emissions-rate-avg-oge.csv')
  )
  avg_fc_emissions_dt = fread(
    here('Data/electricity-generation/output/emissions-rate-avg-fc-oge.csv')
  )
  # loading plant id's we have elec model for 
  regular_plants =
    list.files(here("Data/electricity-generation/plant-model-fit/coefs")) |> 
    str_remove("plant-mod-coefs-") |>
    str_remove("\\.fst") |>
    as.integer()
  
# Adding PM2.5 to rest of pollutants ------------------------------------------
  # First for the CEMS plants
  emissions_rate_dt = 
    rbind(
      emissions_rate_dt_raw[pollutant != 'pm25'],
      merge(
        emissions_rate_dt_raw[,.(
          plant_id_eia,
          fips, stack_height, 
          output, net_generation_mwh_split
        )] |>unique(),
        avg_emissions_dt[,.(
          plant_id_eia,
          pollutant = 'pm25',
          emissions_rate_tons = pm25_tons_per_mwh
        )],
        by = c('plant_id_eia')
      ),
      use.names = TRUE
    )
# Fixing emissions rates for problem plants -----------------------------------
  # These 5 plants were identified as having some underlying data issues 
  # in their emissions. Setting to the median emissions for fuel type
  emissions_rate_dt = 
    join(
      emissions_rate_dt,
      oge_plant_info_unique_dt[,.(plant_id_eia, fuel_category)], 
      on = 'plant_id_eia'
    ) |>
    join(
      melt(
        avg_fc_emissions_dt, 
        id.vars = 'fuel_category'
      )[str_detect(variable, '50'),.(
        fuel_category, 
        pollutant = str_remove(variable, '_tons_per_mwh_p50_fc'), 
        fc_median = value
      )],
      on = c('fuel_category','pollutant')
    ) %>%
    .[, # Setting to median for plants with bad data
      emissions_rate_tons := fifelse(
        plant_id_eia %in% c(2221, 2528, 2503, 50626, 50931), 
        fc_median, 
        emissions_rate_tons
      )
    ]
# Now for marginal damages ----------------------------------------------------
  print('damages')
  fips_dt = read_xlsx(
    path = here('Data/Marginal Damages (2011) from Holland, Mansur, Muller, Yates AER forthcoming.xlsx'),
    sheet = 'ground-level MD per ton'
    ) %>%  
    data.table() %>% 
    .[,.( # Miami-Dade changed their fips code
      fips = fcase(
        fips == '12025', '12086',
        fips != '12025', str_pad(fips, 5,'left','0')
      )
    )]
  # Only have medium and low stack heights (not tall)
  md_dt = 
    rbind(
      cbind(
        fips_dt,
        fread(here("Data/AP3/JointFolder/md_L_2014_CS.csv"))[,stack_height := 'low']
      ),
      cbind(
        fips_dt,
        fread(here("Data/AP3/JointFolder/md_M_2014_CS.csv"))[,stack_height := 'medium']
      )
    ) |> 
    setnames(new = c('fips','nh3','nox','pm25','so2','voc','stack_height')) |>
    melt(
      id.vars = c('fips','stack_height'),
      variable.name = 'pollutant',
      value.name = 'marginal_damage'
    )
  # Merging emissions data
  emissions_md_dt_all = 
    merge(
      emissions_rate_dt, 
      md_dt,
      by = c('fips','stack_height','pollutant'),
      all.x = TRUE
    )
  # Adding social cost of carbon 
  emissions_md_dt_all[,marginal_damage := fcase(
    pollutant == 'co2e', SOCIAL_COST_OF_CARBON,
    pollutant != 'co2e', marginal_damage
  )]
  emissions_md_dt = 
    emissions_md_dt_all[,.(
        md_per_mwh = sum(emissions_rate_tons*marginal_damage),
        emissions_rate_tons = sum(emissions_rate_tons)),
      keyby = .(
        plant_id_eia,
        output, 
        net_generation_mwh_split,
        pollutant 
    )]
  # Saving for later
  write.fst(
    emissions_md_dt, 
    path = here(paste0(
      'Data/electricity-generation/output/emissions-md-dt-scc-',
      round(SOCIAL_COST_OF_CARBON,0),
      '.fst'
    ))
  )
  # Casting into wide format
  emissions_md_dt_wide = 
    dcast(
      emissions_md_dt,
      plant_id_eia + net_generation_mwh_split ~ output + pollutant,
      value.var = 'md_per_mwh'
    )
  setnames(
    emissions_md_dt_wide,
    new = colnames(emissions_md_dt_wide) |> str_replace('emissions_rate','md_per_mwh')
  )
  # Aggregating over all of the pollutants
  tot_md_dt_wide = emissions_md_dt[,.(
    damages_per_mwh = sum(md_per_mwh, na.rm =TRUE)),
    keyby = .(plant_id_eia, output, net_generation_mwh_split)
  ] 
  tot_md_dt = 
    dcast(
      tot_md_dt_wide,
      plant_id_eia ~ output,
      value.var = 'damages_per_mwh'
    ) |>
    setnames(
      old = c('emissions_rate_high','emissions_rate_low'),
      new = c('md_per_mwh_high','md_per_mwh_low')
    ) |>
    merge(
      emissions_md_dt_wide,
      by = c('plant_id_eia')
    )
  # Saving the results
  write.csv(
    tot_md_dt, 
    here(paste0(
      'Data/electricity-generation/output/damage-per-mwh-oge-scc-',
      round(SOCIAL_COST_OF_CARBON,0),
      '.csv'
    ))
  )
  # Cleaning up coal plants
  print('Cleaning up coal')
  tot_md_dt_clean_coal = 
    melt(
      tot_md_dt,
      id.vars = 'plant_id_eia',
      measure.vars = c('md_per_mwh_high','md_per_mwh_low')
    ) |>
    merge(
      oge_plant_info_unique_dt[,.(
        plant_id_eia, 
        nerc_adj,
        stack_height, 
        fuel_category
      )],
      by = 'plant_id_eia',
      all.x = TRUE
    ) 
  # Switching coal distribution to match natural gas distr
  tot_md_dt_clean_coal[,
    value_adj := fcase(
      fuel_category == 'coal', 
        quantile(
          tot_md_dt_clean_coal[fuel_category=='natural_gas']$value, 
          probs = ecdf(tot_md_dt_clean_coal[fuel_category=='coal']$value)(value)
        ),
      fuel_category != 'coal', value,
      is.na(fuel_category), value
    )
  ]
  # Converting back into wide table 
  tot_md_dt_clean_coal_wide = 
    dcast(
      tot_md_dt_clean_coal,
      formula = plant_id_eia ~ variable,
      value.var = 'value_adj'
    ) |>
    merge(
      tot_md_dt[,.(plant_id_eia, net_generation_mwh_split)],
      by = 'plant_id_eia'
    )
  # Saving results
  fwrite(
    tot_md_dt_clean_coal_wide,
    here(paste0(
      'Data/electricity-generation/output/damage-per-mwh-oge-clean-coal-scc-',
      round(SOCIAL_COST_OF_CARBON,0),
      '.csv'
    ))
  )   
  print('done')
}




